rm(list = ls())

#setwd("YOUR-WORKING-DIRECTORY")

# install.packages(c("doParallel", "foreach", "magic"))

library("doParallel")
library("foreach")
library("magic")
library("MASS")
library("beepr")
library("list")
library("rr")

set.seed(88)

stopCluster(cl)
cl <- makeCluster(detectCores())
registerDoParallel(cl)

source("replication-functions.R")

necFunc <- c("adiag", "coef.ictreg", "vcov.ictreg", "ginv", "mvrnorm", 
	"coef.rrreg", "vcov.rrreg")

n.bootstrap <- 5000

misspecifiedComparison <- function (n.sample = 1000, data, h) {

    preds <- generateMisspecifiedComparison(n.sample = n.sample, data = data, n.control.items = 4, 
		h = h, grouping = "subgroup", weighting = "cue")	
    
    preds

}

misspecifiedComparisonRr <- function (n.sample = 1000, data, h) {

    preds <- generateMisspecifiedComparisonRr(n.sample = n.sample, data = data, 
		h = h, grouping = "subgroup", weighting = "cue")	
    
    preds

}

corrs <- c(1.0, 0.5, 0.5, 
	       0.5, 1.0, 0.0, 
	       0.5, 0.0, 1.0)

## LIST 
pop.data <- generateMisspecifiedData(n = 1e07, corrs = corrs, gammas = c(1, 0.5, 0.5), 
	deltas = c(1, 0.5, 0.5), n.control.items = 4, p.t = 0.5, n.groups = 5, sq.coef = -1)

h <- tapply(pop.data$z, pop.data$group, mean)

popMeans <- tapply(pop.data$z, pop.data$x1, mean)

index <- rep(0:1, n.bootstrap)

point1 <- foreach(i = 1:n.bootstrap, .combine = "rbind", .export = necFunc) %dopar% {

    misspecifiedComparison(data = pop.data, h = h)	

}

point2 <- foreach(i = 1:n.bootstrap, .combine = "rbind", .export = necFunc) %dopar% {

    misspecifiedComparison(n.sample = 2500, data = pop.data, h = h)	

}

point3 <- foreach(i = 1:n.bootstrap, .combine = "rbind", .export = necFunc) %dopar% {

    misspecifiedComparison(n.sample = 10000, data = pop.data, h = h)	

}

x0.rmse.0 <- c()
x0.rmse.0[1] <- rmse(point1[index == 0, 1], popMeans[1])
x0.rmse.0[2] <- rmse(point2[index == 0, 1], popMeans[1])
x0.rmse.0[3] <- rmse(point3[index == 0, 1], popMeans[1])

x1.rmse.0 <- c()
x1.rmse.0[1] <- rmse(point1[index == 0, 2], popMeans[2])
x1.rmse.0[2] <- rmse(point2[index == 0, 2], popMeans[2])
x1.rmse.0[3] <- rmse(point3[index == 0, 2], popMeans[2])

x0.rmse.1 <- c()
x0.rmse.1[1] <- rmse(point1[index == 1, 1], popMeans[1])
x0.rmse.1[2] <- rmse(point2[index == 1, 1], popMeans[1])
x0.rmse.1[3] <- rmse(point3[index == 1, 1], popMeans[1])

x1.rmse.1 <- c()
x1.rmse.1[1] <- rmse(point1[index == 1, 2], popMeans[2])
x1.rmse.1[2] <- rmse(point2[index == 1, 2], popMeans[2])
x1.rmse.1[3] <- rmse(point3[index == 1, 2], popMeans[2])

all.points <- rbind(point1, point2, point3)


## RANDOMIZED RESPONSE
pop.data.rr <- generateMisspecifiedDataRr(n = 1e07, corrs = corrs,  
	deltas = c(1, 0.5, 0.5), p.t = 0.5, n.groups = 5, sq.coef = -1)

h.rr <- tapply(pop.data.rr$z, pop.data.rr$group, mean)

popMeans.rr <- tapply(pop.data.rr$z, pop.data.rr$x1, mean)

point1.rr <- foreach(i = 1:n.bootstrap, .combine = "rbind", .export = necFunc) %dopar% {

    misspecifiedComparisonRr(data = pop.data.rr, h = h.rr)	

}

point2.rr <- foreach(i = 1:n.bootstrap, .combine = "rbind", .export = necFunc) %dopar% {

    misspecifiedComparisonRr(n.sample = 2500, data = pop.data.rr, h = h.rr)	

}

point3.rr <- foreach(i = 1:n.bootstrap, .combine = "rbind", .export = necFunc) %dopar% {

    misspecifiedComparisonRr(n.sample = 10000, data = pop.data.rr, h = h.rr)	

}

x0.rmse.0.rr <- c()
x0.rmse.0.rr[1] <- rmse(point1.rr[index == 0, 1], popMeans.rr[1])
x0.rmse.0.rr[2] <- rmse(point2.rr[index == 0, 1], popMeans.rr[1])
x0.rmse.0.rr[3] <- rmse(point3.rr[index == 0, 1], popMeans.rr[1])

x1.rmse.0.rr <- c()
x1.rmse.0.rr[1] <- rmse(point1.rr[index == 0, 2], popMeans.rr[2])
x1.rmse.0.rr[2] <- rmse(point2.rr[index == 0, 2], popMeans.rr[2])
x1.rmse.0.rr[3] <- rmse(point3.rr[index == 0, 2], popMeans.rr[2])

x0.rmse.1.rr <- c()
x0.rmse.1.rr[1] <- rmse(point1.rr[index == 1, 1], popMeans.rr[1])
x0.rmse.1.rr[2] <- rmse(point2.rr[index == 1, 1], popMeans.rr[1])
x0.rmse.1.rr[3] <- rmse(point3.rr[index == 1, 1], popMeans.rr[1])

x1.rmse.1.rr <- c()
x1.rmse.1.rr[1] <- rmse(point1.rr[index == 1, 2], popMeans.rr[2])
x1.rmse.1.rr[2] <- rmse(point2.rr[index == 1, 2], popMeans.rr[2])
x1.rmse.1.rr[3] <- rmse(point3.rr[index == 1, 2], popMeans.rr[2])

all.points.rr <- rbind(point1.rr, point2.rr, point3.rr)


pdf("figure-6.pdf", height = 8.5, width = 10.5)
{
	par(mfrow = c(2, 2),
		cex = 0.8,
	    #las = 1, 
	    mar = c(5, 4, 2, 2) + 0.1, 
	    oma = c(1, 9, 1, 1),
	    mgp = c(3, 1, 0),
	    xpd = FALSE)

	## LIST
	plot(popMeans, xlim = c(0.75, 2.75), ylim = c(0.4, 0.9), pch = 15, main = "Predictions under Model Misspecification", 
		xaxt = "n", ylab = "Prevalence of Sensitive Trait", xlab = "X1")
	axis(1, at = c(1, 2) + 0.25, labels = c("0", "1"))
	points(x = 1:2 + 0.25, y = apply(all.points[index == 0, 1:2], 2, mean), pch = 16:17)
	lines(x = c(1, 1) + 0.25, y = quantile(all.points[index == 0, "x1.0"], probs = c(0.025, 0.975)))
	lines(x = c(2, 2) + 0.25, y = quantile(all.points[index == 0, "x1.1"], probs = c(0.025, 0.975)))

	points(x = 1:2 + 0.5, y = apply(all.points[index == 1, 1:2], 2, mean), pch = 16:17)
	lines(x = c(1, 1) + 0.5, y = quantile(all.points[index == 1, "x1.0"], probs = c(0.025, 0.975)), lty = 2)
	lines(x = c(2, 2) + 0.5, y = quantile(all.points[index == 1, "x1.1"], probs = c(0.025, 0.975)), lty = 2)

	legend("topleft", 
		legend = c("Population Mean",
			       "X1 = 0 without Auxiliary Information", 
			       "X1 = 1 without Auxiliary Information", 
			       "X1 = 0 with Auxiliary Information", 
			       "X1 = 1 with Auxiliary Information"), 
		lty = c(NA, rep(1:2, each = 2)), 
		pch = c(15, rep(16:17, times = 2)), 
		bty = "n")

	plot(x = 1:3, y = x0.rmse.0, type = "b", pch = 16, ylim = c(0, 0.06), 
		ylab = "RMSE", xlab = "Sample Size", xaxt = "n", 
		main = "Root Mean Squared Error")
	axis(1, at = 1:3, labels = c("1000", "2500", "10000"))
	lines(x = 1:3, y = x1.rmse.0, type = "b", pch = 17)

	lines(x = 1:3, y = x0.rmse.1, type = "b", pch = 16, lty = 2)
	lines(x = 1:3, y = x1.rmse.1, type = "b", pch = 17, lty = 2)

	abline(h = 0, col = "darkgray")

	## RANDOMIZED RESPONSE
	plot(popMeans.rr, xlim = c(0.75, 2.75), ylim = c(0.4, 0.9), pch = 15, main = "", 
		xaxt = "n", ylab = "Prevalence of Sensitive Trait", xlab = "X1")
	axis(1, at = c(1, 2) + 0.25, labels = c("0", "1"))
	points(x = 1:2 + 0.25, y = apply(all.points.rr[index == 0, 1:2], 2, mean), pch = 16:17)
	lines(x = c(1, 1) + 0.25, y = quantile(all.points.rr[index == 0, "x1.0"], probs = c(0.025, 0.975)))
	lines(x = c(2, 2) + 0.25, y = quantile(all.points.rr[index == 0, "x1.1"], probs = c(0.025, 0.975)))

	points(x = 1:2 + 0.5, y = apply(all.points.rr[index == 1, 1:2], 2, mean), pch = 16:17)
	lines(x = c(1, 1) + 0.5, y = quantile(all.points.rr[index == 1, "x1.0"], probs = c(0.025, 0.975)), lty = 2)
	lines(x = c(2, 2) + 0.5, y = quantile(all.points.rr[index == 1, "x1.1"], probs = c(0.025, 0.975)), lty = 2)

	legend("topleft", 
		legend = c("Population Mean",
			       "X1 = 0", 
			       "X1 = 1", 
			       "X1 = 0 w/ Auxiliary Information", 
			       "X1 = 1 w/ Auxiliary Information"), 
		lty = c(NA, rep(1:2, each = 2)), 
		pch = c(15, rep(16:17, times = 2)), 
		bty = "n")


	plot(x = 1:3, y = x0.rmse.0.rr, type = "b", pch = 16, ylim = c(0, 0.06), 
		ylab = "RMSE", xlab = "Sample Size", xaxt = "n", 
		main = "")
	axis(1, at = 1:3, labels = c("1000", "2500", "10000"))
	lines(x = 1:3, y = x1.rmse.0.rr, type = "b", pch = 17)

	lines(x = 1:3, y = x0.rmse.1.rr, type = "b", pch = 16, lty = 2)
	lines(x = 1:3, y = x1.rmse.1.rr, type = "b", pch = 17, lty = 2)

	abline(h = 0, col = "darkgray")

	mtext("List\nExperiment", side = 2, line = 1.25, outer = TRUE, at = 0.8, font = 2) 
	mtext("Randomized\nResponse", side = 2, line = 1.25, outer = TRUE, at = 0.3, font = 2) 
}
dev.off()

stopCluster(cl)

